clear
set more off
pause on
set matsize 10000

******************************************************************************************
* Name: 	summary-stats.do
* Purpose: 	this do file uses merged election data to produce summary stats
* Input:	ca-election.dta / ca-election-full.dta
******************************************************************************************

*****************************
* Set locals	
*****************************

* Option to use data with incumbents
local incum = 0

* Option to use data with both candidates in a race present
local both = 0

local suffix = "incum`incum'-both`both'"

cap log close
log using summary-stats-`suffix'.log, text replace

if `incum'==0 use ca-election, clear
if `incum'==1 use ca-election-full, clear

*****************************
* Sample definition
*****************************

if `incum'==1 {
	* Drop if first election after 2006
	drop if first_election==.
	count 

	* Drop individuals who are in races with only one candidate
	drop if first_num_cand==1
	count 

	* Drop individuals with no office categorization 
	drop if officecat==.
	count 

	* Drop names that can't be classified (either ambiguous gender or not in my names database) 
	replace sex=. if sex==999
	drop if sex==.
}

assert first_election!=.
assert officecat!=.
assert sex!=.

count

if `both'==1 {
	* limit to races with both winner/loser in a given race part of the sample (non-incumbent, first observed)
	duplicates tag first_raceid if  first_margin!=., g(tag_both_multi)	
	
	keep if  tag_both_multi==1
	
	local folder = "both-cand"
	count
}

* Define regression controls: first election year, county
egen county = group(CNTYNAME)
local controls = "i.first_election i.county"

* Define clustering
local cluster_var = "county"

* Merge in female representation at the county and county-office level, from 1995-7
merge m:1 CNTYNAME officecat2 using female-rep.dta
tab _m
drop _m
 
* Drop variables
keep first* female run_atall run8_atall win* margin_next run_again county* officecat* officecat2* cityname citypop high* mean* count* place* office* same

* office, occupation, city dummies
tab officecat2, g(office_group)

rename first_occ_group occ_group
label define occ_group 1 "management"
label define occ_group 2 "business/financial operations", add
label define occ_group 3 "computer/mathematical", add
label define occ_group 4 "architecture/engineering", add
label define occ_group 5 "life/physical/social science", add
label define occ_group 6 "community/social service", add
label define occ_group 7 "legal", add
label define occ_group 8 "education/training/library", add
label define occ_group 9 "arts/design/entertainment/sports/media", add
label define occ_group 10 "healthcare practitioners", add
label define occ_group 11 "healthcare support", add
label define occ_group 12 "protective service", add
label define occ_group 13 "food prep/serving related", add
label define occ_group 14 "building/grounds cleaning and maintenance", add
label define occ_group 15 "personal care/service", add
label define occ_group 16 "sales", add
label define occ_group 17 "office/admin support", add
label define occ_group 18 "farming/fishing/forestry", add
label define occ_group 19 "construction/extraction", add
label define occ_group 20 "installation/maintenance/repair", add
label define occ_group 21 "production", add
label define occ_group 22 "transportation", add
label define occ_group 23 "material moving", add
label define occ_group 24 "military", add
label define occ_group 25 "student", add
label define occ_group 26 "govt employee", add
label define occ_group 27 "business owner", add
label define occ_group 28 "parent/homemaker", add
label define occ_group 29 "retired", add
label define occ_group 30 "not classified", add
label define occ_group 31 "officeholder", add
label define occ_group 32 "incumbent", add
label define occ_group 33 "none listed", add
label val occ_group occ_group

************************************
* Summary stats
* Tables 1
* Figures A.1, A.2
************************************

* Number of unique races and candidates, by sex
unique first_raceid 
unique first_raceid if female==0
unique first_raceid if female==1

unique first_raceid if first_margin!=.
unique first_raceid if first_margin!=. & female==0
unique first_raceid if first_margin!=. & female==1

* Election outcomes, by sex
tab first_elected female, col row
tab first_elected female if first_margin!=., col row

tab run_atall female, col row
tab run_atall female if first_margin!=., col row

tab win_uncond female, col row
tab win_uncond female if first_margin!=., col row

tab win female, col row
tab win female if first_margin!=., col row

tab officecat2 female, col row
tab officecat2 female if first_margin!=., col row

tab occ_group female, col row
tab occ_group female if first_margin!=., col row

* Election outcomes, separated by winners/losers
bys first_elected: tab run_atall female, col row
bys first_elected: tab run_atall female if first_margin!=., col row

bys first_elected: tab win_uncond female, col row
bys first_elected: tab win_uncond female if first_margin!=., col row

bys first_elected: tab win female, col row
bys first_elected: tab win female if first_margin!=., col row

tab officecat2 female, col row
tab officecat2 female if first_margin!=., col row

tab occ_group female, col row
tab occ_group female if first_margin!=., col row

* Win rate among close losing candidates who run again
bys female: summ win if first_elected==0 & run_atall==1 & abs(first_margin)<=0.103

*******************
* Transition matrix 
* Table A.1
* Figure A.3
*******************

gen off3 = 1 if officecat2==3
replace off3 = 2 if officecat2==5
replace off3 = 3 if officecat2==1 | officecat2==2 | officecat2==4 | officecat2==6

label define off3 1 "City Council" 2 "School Board" 3 "Other"
label val off3 off3

* Three broad offices (regardless of place)
gen off3_next = 1 if officecat2_next==3
replace off3_next = 2 if officecat2_next==5
replace off3_next = 3 if officecat2_next==1 | officecat2_next==2 | officecat2_next==4 | officecat2_next==6

label val off3_next off3

* All novices
tab same
bys first_elected: tab same
bys first_elected female: tab same

tab off3 same, row
bys first_elected: tab off3 same, row
bys female: tab off3 same, row

* Marginal
tab same if first_margin!=.
bys first_elected: tab same if first_margin!=.
bys first_elected female: tab same if first_margin!=.

tab off3 same if first_margin!=., row
bys first_elected: tab off3 same if first_margin!=., row
bys female: tab off3 same if first_margin!=., row

* All by female by outcome
bys female: tab off3 same if first_elected==0, row
bys female: tab off3 same if first_elected==1, row

* Marginal by female by outcome
bys female: tab off3 same if first_margin!=. & first_elected==0, row
bys female: tab off3 same if first_margin!=. & first_elected==1, row

* All novices, transitions among office categories
tab off3 off3_next
bys first_elected: tab off3 off3_next
